from pyspark.sql.session import SparkSession

from pyspark.sql.functions import *
from pyspark.sql.window import Window

"""
表1
姓名,科目,分数
name,item,score
张三,数学,33
张三,英语,77
李四,数学,66
李四,英语,78


表2
姓名,数学,英语
name,math,english
张三,33,77
李四,66,78

    1、将表1转化成表2
    2、将表2转化成表1
"""
spark = SparkSession \
    .builder \
    .master("local") \
    .appName("demo7_student") \
    .config("spark.sql.shuffle.partitions", 1) \
    .getOrCreate()

student_score = spark \
    .read \
    .format("csv") \
    .schema("name STRING,item STRING,score DOUBLE") \
    .load("../../data/student_score.txt")

# 1、将表1转化成表2
score_student = student_score \
    .groupBy("name") \
    .agg(
    sum(when(col("item") == "数学", col("score")).otherwise("0")).alias("math"),
    sum(when(col("item") == "英语", col("score")).otherwise("0")).alias("english")
)

score_student.show()

# 2、将表2转化成表1
score_student \
    .select("name",
            explode(create_map(expr("'数学'"), "math", expr("'英语'"), "english")).alias("item", "score")).show()
